/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */ /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */ package net.nutch.indexer; import java.util.Date; import java.io.File; import java.io.IOException; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.index.IndexWriter; import net.nutch.util.LogFormatter; import java.util.logging.Logger; import java.util.logging.Level; /** Creates an index for the output corresponding to a single fetcher run. */ public class IndexMerger { public static final Logger LOG = LogFormatter.getLogger("net.nutch.indexer.IndexMerger"); public static final String DONE_NAME = "merge.done"; private File indexDirectory; private File[] segments; public IndexMerger(File indexDirectory, File[] segments) { this.indexDirectory = indexDirectory; this.segments = segments; } private void merge() throws IOException { Directory[] dirs = new Directory[segments.length]; for (int i = 0; i < segments.length; i++) dirs[i] = FSDirectory.getDirectory(new File(segments[i],"index"), false); IndexWriter writer = new IndexWriter(indexDirectory, null, true); writer.mergeFactor = 50; writer.infoStream = LogFormatter.getLogStream(LOG, Level.INFO); writer.setUseCompoundFile(false); writer.setSimilarity(new NutchSimilarity()); writer.addIndexes(dirs); writer.close(); } /** Create an index for the input files in the named directory. */ public static void main(String[] args) throws Exception { File indexDirectory; String usage = "IndexMerger indexDirectory segments..."; if (args.length < 2) { System.err.println("Usage: " + usage); return; } indexDirectory = new File(args[0]); File[] segments = new File[args.length - 1]; for (int i = 1; i < args.length; i++) { segments[i-1] = new File(args[i]); } LOG.info("merging segment indexes to: " + indexDirectory); IndexMerger merger = new IndexMerger(indexDirectory, segments); merger.merge(); LOG.info("done merging"); } }